#!/bin/bash

base_url="http://www.stats.gov.cn"
list_url=$base_url"/tjsj/tjgb/ndtjgb/"

## 2021: "/tjsj/zxfb/202202/t20220227_1827960.html" ##

year=$1

if [ -z $year ];then
	echo "usage: $0 <year>"
	echo "example: $0 2021"
	exit 1
fi

year_uri=`curl -s -L $list_url | grep -E "tjsj.*${year}年" | awk -F href= '{ print $2 }' | awk '{ print $1 }'|sed 's/\"//g'`

#echo "$year uri: $year_uri"

year_url="${base_url}"${year_uri}

if [ -z "$year_url" ];then
	echo "err: parse year url failed!"
	exit 2
fi

echo "$year URL: $year_url"

report=`curl -s -L $year_url`  # grep -E "年末全国人口|全年国内生产总值|全年全国一般公共预算收入|全国一般公共预算支出"


declare -A fields #=(["年末全国人口"]=0, ["全年国内生产总值"]=0, ["全年全国一般公共预算收入"]=0, ["全国一般公共预算支出"]=0)

#fields["全国大陆总人口"]=0

# for sequence
key_list=("全国人口" "生产总值" "全国大陆总人口" "一般公共预算收入" "一般公共预算支出")

#for key in ${!fields[*]};
for key in ${key_list[*]};
do
	fields[$key]=`echo $report | awk -F "$key" '{ print $2 }' | awk -F '，' '{ print $1 }' | perl -pe 's/<.*?>//g' | sed 's/\[.*\]//g'`
	echo "$key: ${fields[$key]}"
done

exit 0

